from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import numpy as np
from utils.mlmodel_util import get_model_info_sklearn
from utils.model_util import load_model
from utils.mlmodel_util import preprocess
from utils.format_util import dup_name_handler


def predict(dataframe, para_list, output):
    # --------------------load the model-----------------
    model_save_path, _, feature_list = get_model_info_sklearn(para_list["model_id"])
    print(type(feature_list))
    mlmodel = load_model(model_save_path)
    # --------------------make predictions----------------
    X = preprocess(dataframe, para_list, feature_list)
    pred = mlmodel.predict(X)
    output_cols = "_prediction_"
    output_cols = dup_name_handler(output_cols, para_list["feature_col"] + [para_list["label_col"]])
    output["result"]["output_params"]["output_cols"] = output_cols
    dataframe[output_cols] = pred
    return dataframe


def train(dataframe, para_list, record):
    # --------------------prepare data------------------
    feature_list = []
    X = preprocess(dataframe, para_list, feature_list)
    label_col = para_list["label_col"]
    y_true = dataframe[label_col].values

    # ---------------------model fit---------------------
    X_constant = sm.add_constant(X)
    stats_model = sm.OLS(y_true, X_constant).fit()

    mlmodel = LinearRegression().fit(X, y_true)
    y_pred = mlmodel.predict(X)

    # ---------------------get metrics--------------------
    n = len(y_true)
    p = len(feature_list)

    MSE = mean_squared_error(y_true, y_pred)
    RMSE = np.sqrt(MSE)
    MAE = mean_absolute_error(y_true, y_pred)
    r2 = mlmodel.score(X, y_true)
    r2adj = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)
    pvalues = stats_model.pvalues
    tvalues = stats_model.tvalues

    # ---------------------record info----------------------
    other_info = {
        "feature_list": feature_list,
        "feature_col": para_list["feature_col"],
        "coefficients": list(mlmodel.coef_),
        "intercept": mlmodel.intercept_,
        "MSE": MSE,
        "MAE": MAE,
        "RMSE": RMSE,
        "r2": r2,
        "r2adj": r2adj,
        "pvalues": list(pvalues),
        "tvalues": list(tvalues),
        "total_iterations": 1,
        "df": n - p - 1,
        "coefficient_standard_errors": list(stats_model.bse),
        "aic": stats_model.aic,
        "bic": stats_model.bic
    }
    record["other_info"] = other_info

    output_cols = "_prediction_"
    # output["result"]["output_params"]["output_cols"] = output_cols
    dataframe[output_cols] = y_pred
    return mlmodel, dataframe
